/*******************************************************************************
*File name: download_and_prep_SIPP.do

*Description: 
Retrieve 1996, 2001, 2004, 2008 panels of SIPP core data from the Census FTP site. 

* Notes: 
From NBER website: The 1996 Panel Longitudinal Core files are cleaned, edited versions of the Core files. I use the PLC files rather than the core files.
PLC=Panel Longitudnal Core ( The 1996 Panel Longitudnal Core files are BLS cleaned and edited versions of the Core files. So use PLC files rather then Core files. 
They replace the Full Panel files created for earlier panel years ).

* Sources: 
* SIPP FTP site
* https://www2.census.gov/programs-surveys/sipp/data/datasets/
* Census intro 
* https://www.census.gov/programs-surveys/sipp.html
* NBER
* https://www.nber.org/research/data/survey-income-and-program-participation-sipp
********************************************************************************/



/*******************************************************************************
** SET ENVIRONMENT
*******************************************************************************/

clear all
set more off

** Set global macros
global path "<insert file path>" /*UPDATE THE PATH*/
global version "v1"
global current_date: display %td_CCYYNNDD date(c(current_date), "DMY")
global current_date = strtrim("${current_date}")

** Set up working directory structure for the project
local workdir "${path}"
cap cd "`workdir'"

capture mkdir "data"
capture mkdir "data_output"
capture mkdir "documents"
capture mkdir "graphics"
capture mkdir "log"
capture mkdir "posted"
capture mkdir "scripts"
capture mkdir "NBER"

/*******************************************************************************
** DOWNLOAD SIPP DATA
*******************************************************************************/

local workdir "${path}/data"

local year 	1996	
local yr 	96 	 	
local wave 	12	
	
** Update the following locals manually
cap mkdir "`workdir'/`year'" 
cd "`workdir'/`year'" 

** year 1996
forval i = 1/`wave' { 	
	copy "https://data.nber.org/sipp/`year'/sipp`yr'l`wave'.zip" "sipp`yr'l`i'.zip", replace
	unzipfile "sipp`yr'l`i'.zip", replace
	rm "sipp`yr'l`i'.zip" // erase zip files
}

**** year 2001 2004 2008
* SIPP FTP site, https://www2.census.gov/programs-surveys/sipp/data/datasets/
* Note the number of waves vary by year
* It takes 2-3 minutes 

clear
foreach year in  2001 2004 2008 {
				
cap mkdir "`workdir'/`year'" 
cd "`workdir'/`year'" 

if (`year' == 2001) local yr 01
if (`year' == 2001) local wave 9

if (`year' == 2004) local yr 04
if (`year' == 2004) local wave 12

if (`year' == 2008) local yr 08
if (`year' == 2008) local wave 16

quietly forval i = 1/`wave' { 
	display as result "Downloading data for `year'wave`i'.dta."
	cap copy "https://www2.census.gov/programs-surveys/sipp/data/datasets/`year'/w`i'/l`yr'puw`i'.zip" "l`yr'puw`i'.zip", replace // overwrite 
	unzipfile "l`yr'puw`i'.zip", replace // unzip files
	rm l`yr'puw`i'.zip // erase zip files

	} // end of i loop
} // end of year loop

/*******************************************************************************
** DOWNLOAD NBER DATA DICTIONARY
*******************************************************************************/


clear all
set more off

local workdir "${path}/NBER"
cap cd "`workdir'"

**** 1996 and 2001
foreach year in 1996 2001 {
	
cap mkdir "`workdir'/`year'" 
cap mkdir "`workdir'/`year'/Dictionary" 
cap mkdir "`workdir'/`year'/Label" 
cap mkdir "`workdir'/`year'/longitudinal_weight" 

cd "`workdir'/`year'" 
	
if (`year' == 1996) local yr 96
if (`year' == 1996) local wave 12
																	
if (`year' == 2001) local yr 01
if (`year' == 2001) local wave 9

quietly forval i = 1/`wave' { 
	 copy "https://data.nber.org/sipp/`year'/sip`yr'w`i'.dct" "Dictionary/sipp`yr'w`i'.dct", replace // dictionary 
	 copy "https://data.nber.org/sipp/`year'/sip`yr'w`i'.do" "Label/sipp`yr'w`i'.do", replace // label 

}
 
** longitudinal weight. only works for 1996
//cap copy "https://data.nber.org/sipp/`year'/sipp`year'ctl_fer.do" "longitudinal_weight/sipplgtwgt`year'w`wave'.do", replace
//cap copy "https://data.nber.org/sipp/`year'/sipp`year'ctl_fer.dct" "longitudinal_weight/sipplgtwgt`year'w`wave'.dct", replace

** for 2001 dictionary. 
cap copy "https://data.nber.org/sipp/`year'/sip`yr'lw`wave'.dct" "longitudinal_weight/sipplgtwgt`year'w`wave'.dct", replace
** Note 2001 has no .do file 
} // end of year loop

**** 2004 and 2008
foreach year in  2004 2008 {
	
if (`year' == 2004) local yr 04
if (`year' == 2004) local wave 12

if (`year' == 2008) local yr 08
if (`year' == 2008) local wave 16

cap mkdir "`workdir'/`year'" 
cap mkdir "`workdir'/`year'/Dictionary" 
cap mkdir "`workdir'/`year'/Label" 
cap mkdir "`workdir'/`year'/longitudinal_weight" 

cd "`workdir'/`year'" 

**** 2004/2008 and 1996/2001 have different URLs. Pick one and only one chunk of code. 
** 2004/2008

****2004 and 2008 
* Note the number of waves vary by year
quietly forval i = 1/`wave' { 
	 copy "https://data.nber.org/sipp/`year'/sippl`yr'puw`i'.dct" "Dictionary/sippl`yr'puw`i'.dct", replace // dictionary 
	 copy "https://data.nber.org/sipp/`year'/sippl`yr'puw`i'.do" "Label/sippl`yr'puw`i'.do", replace // label 

} // end of wave loop

 ** Longitudinal Weight
//copy "https://data.nber.org/sipp/`year'/sipplgtwgt`year'w`wave'.do" "longitudinal_weight/sipplgtwgt`year'w`wave'.do", replace
//copy "https://data.nber.org/sipp/`year'/sipplgtwgt`year'w`wave'.dct" "longitudinal_weight/sipplgtwgt`year'w`wave'.dct", replace
} // end of year loop



/*******************************************************************************
** DOWNLOAD SIPP TOPICAL MODULES, SECOND WAVE
*******************************************************************************/


clear all
set more off

local workdir "${path}/data"
cap cd "`workdir'"

foreach year in 2001 2004 2008 {												
if (`year' == 2001) local yr 01
if (`year' == 2004) local yr 04
if (`year' == 2008) local yr 08

local wave 2

cap mkdir "`workdir'/`year'" 

cd "`workdir'/`year'" 

**** 2001, 2004 and 2008
 copy "https://www2.census.gov/programs-surveys/sipp/data/datasets/`year'/w`wave'/p`yr'putm`wave'.zip" "p`yr'putm`wave'.zip", replace
unzipfile "p`yr'putm`wave'.zip", replace
rm "p`yr'putm`wave'.zip" // erase zip files
}


**** for 1996 only
** I download the data from NBER instead of the Census FTP site because the FTP site only has the SAS file. 

foreach year in 1996 {
	
cap mkdir "`workdir'/`year'" 
cd "`workdir'/`year'" 

if (`year' == 1996) local yr 96
local wave 2

** panel 1996 topical module wave 2
copy "https://data.nber.org/sipp/`year'/sipp`yr't`wave'.zip" "p`yr'putm`wave'.zip", replace
unzipfile "p`yr'putm`wave'.zip", replace
cap rm "sipp`yr't`wave'.zip" // erase zip files
 
cap _renamefile sipp96t2.dat p96putm2.dat // rename dat file so it's consistent with other years

}

** DOWNLOAD NBER DATA LABELS

foreach year in 1996 2001 {
	
if (`year' == 1996) local yr 96															
if (`year' == 2001) local yr 01
local wave  2

local workdir "${path}/NBER"

cd "`workdir'/`year'" 

** Download data dictionary
copy "https://data.nber.org/sipp/`year'/sip`yr't`wave'.dct" "Dictionary/sippp`yr'putm2.dct", replace // dictionary 
copy "https://data.nber.org/sipp/`year'/sip`yr't`wave'.do" "Label/sippp`yr'putm`wave'.do", replace // label 
}

**** for 2004 and 2008
foreach year in 2004 2008 {

if (`year' == 2004) local yr 04
if (`year' == 2008) local yr 08
local wave  2

local workdir "${path}/NBER"
 
cd "`workdir'/`year'" 

** Download data dictionary
copy "https://data.nber.org/sipp/`year'/sippp`yr'putm`wave'.dct" "Dictionary/sippp`yr'putm2.dct", replace // dictionary 
copy "https://data.nber.org/sipp/`year'/sippp`yr'putm`wave'.do" "Label/sippp`yr'putm`wave'.do", replace // label 
}

** CREATE DTA FILE

clear all
set more off

local workdir "${path}/data"
cap cd "`workdir'"

foreach year in 1996 2001 2004 2008 {
	
if (`year' == 1996) local yr 96													
if (`year' == 2001) local yr 01
if (`year' == 2004) local yr 04
if (`year' == 2008) local yr 08
local wave 2

* source DAT file
local dat_name "`year'/p`yr'putm`wave'.dat"

* output '.dta' file 
local dta_name "`year'/p`yr'putm`wave'.dta"

* path to the data dictionary file 
local dct_name "`year'/sippp`yr'putm`wave'.dct"

* import
quietly infile using "`dct_name'", using("`dat_name'") clear

label data "`calyear' SIPP `year' Topical Module wave `i' created on `c(current_date)'"

notes: File downloaded on `c(current_date)'

saveold `dta_name' , replace
}

**  LABEL DATA

clear all
set more off

local workdir "${path}"
cd "`workdir'"

foreach year in 1996 2001 2004 2008 {
	
if (`year' == 1996) local yr 96																	
if (`year' == 2001) local yr 01
if (`year' == 2004) local yr 04
if (`year' == 2008) local yr 08

local wave 	2 	    // only need wave 2

use "data/`year'/p`yr'putm`wave'.dta", clear

//do "NBER/`year'/Label/sippp`yr'putm`wave'.do" // need to comment out the beginning/end of the do file where in/out paths are specified

// Downloaded from https://www.nber.org/research/data/survey-income-and-program-participation-sipp
// I commented out the beginning and the end of the NBER do files where they specify the file input and output locations

label data "`calyear' SIPP `year' topical modules wave `wave' created on `c(current_date)'"

notes: File downloaded from Census FTP site and labeled using NBER dictionary on `c(current_date)'
	
display "data/`year'/`year'tm`wave'.dta"
	
saveold "data/`year'/`year'tm`wave'.dta" , replace

} // end of year loop




/*******************************************************************************
** RETRIEVE DICTIONARY AND DOCUMENTS FROM NBER
*Download data dictionary, labels, and documentations for 1996, 2001, 2004, 2008 panels of SIPP core data from NBER
*https://www.nber.org/research/data/survey-income-and-program-participation-sipp
*******************************************************************************/

clear all
set more off

local workdir "${path}/NBER"
cap cd "`workdir'"

foreach year in 1996 2001 {

cap mkdir "`workdir'/`year'" 
cap mkdir "`workdir'/`year'/Dictionary" 
cap mkdir "`workdir'/`year'/Label" 
cap mkdir "`workdir'/`year'/longitudinal_weight" 

cd "`workdir'/`year'" 

if (`year' == 1996) local yr 96
if (`year' == 1996) local wave 12
																	
if (`year' == 2001) local yr 01
if (`year' == 2001) local wave 9

**** 1996 and 2001
* Note the number of waves vary by year
** for 1996, use the PLC files
quietly forval i = 1/`wave' { 
	 cap copy "https://data.nber.org/sipp/`year'/sip`yr'l`i'.dct" "Dictionary/sipp`yr'w`i'.dct", replace // dictionary 
	 cap copy "https://data.nber.org/sipp/`year'/sip`yr'l`i'.do" "Label/sipp`yr'w`i'.do", replace // label 

}

 ** files mostly identical for all waves. just download one copy. does't work for 1996 through
// copy "https://data.nber.org/sipp/`year'/l`yr'puw1d.txt" "l`year'puw1.txt", replace 
// copy "https://data.nber.org/sipp/`year'/`year'w1.pdf" "`year'w1.pdf", replace 
 
** longitudinal weight. only works for 1996
cap copy "https://data.nber.org/sipp/`year'/sipp`year'ctl_fer.do" "longitudinal_weight/sipplgtwgt`year'w`wave'.do"
cap copy "https://data.nber.org/sipp/`year'/sipp`year'ctl_fer.dct" "longitudinal_weight/sipplgtwgt`year'w`wave'.dct"

** for 2001 dictionary. 
cap copy "https://data.nber.org/sipp/`year'/sip`yr'lw`wave'.dct" "longitudinal_weight/sipplgtwgt`year'w`wave'.dct"

** Note 2001 has no .do file 
}

foreach year in 2004 2008 {
	
if (`year' == 2004) local yr 04
if (`year' == 2004) local wave 12

if (`year' == 2008) local yr 08
if (`year' == 2008) local wave 16

cap mkdir "`workdir'/`year'" 
cap mkdir "`workdir'/`year'/Dictionary" 
cap mkdir "`workdir'/`year'/Label" 
cap mkdir "`workdir'/`year'/longitudinal_weight" 

cd "`workdir'/`year'" 

****2004 and 2008 
* Note the number of waves vary by year
quietly forval i = 1/`wave' { 
	 copy "https://data.nber.org/sipp/`year'/sippl`yr'puw`i'.dct" "Dictionary/sippl`yr'puw`i'.dct", replace // dictionary 
	 copy "https://data.nber.org/sipp/`year'/sippl`yr'puw`i'.do" "Label/sippl`yr'puw`i'.do", replace // label 

}
 
 ** Longitudinal Weight
copy "https://data.nber.org/sipp/`year'/sipplgtwgt`year'w`wave'.do" "longitudinal_weight/sipplgtwgt`year'w`wave'.do", replace
copy "https://data.nber.org/sipp/`year'/sipplgtwgt`year'w`wave'.dct" "longitudinal_weight/sipplgtwgt`year'w`wave'.dct", replace

}


/*******************************************************************************
** CREATE SIPP STATA FILE
Create stata format files for core waves
*******************************************************************************/


clear all
set more off

local workdir "${path}/data"
cd "`workdir'"


**** 2004/2008 and 1996/2001 have different file names. Pick one and only one chunk of code. 

** 2004 and 2008 
** This will take roughly 1 minute per wave

foreach year in  2004 2008 {

if (`year' == 2004) local yr 04
if (`year' == 2004) local wave 12

if (`year' == 2008) local yr 08
if (`year' == 2008) local wave 16

quietly forval i = 1/`wave' { 
	* source DAT file
	local dat_name "`year'/l`yr'puw`i'.dat"

	* output '.dta' file 
	local dta_name "`year'/sippl`yr'puw`i'.dta"

	* path to the data dictionary file 
	local dct_name "`year'/sippl`yr'puw`i'.dct"

	* import
	quietly infile using "`dct_name'", using("`dat_name'") clear

	label data "`calyear' SIPP `year' panel wave `i' created on `c(current_date)'"

	notes: File downloaded on `c(current_date)'

	saveold `dta_name' , replace
 } // end of wave loop
} // end of year loop


** 1996
** This will take roughly 1 minute per wave

foreach year in 1996  {
	
if (`year' == 1996) local yr 96
if (`year' == 1996) local wave 12

quietly forval i = 1/`wave' { 
	* source DAT file
	local dat_name "`year'/sipp`yr'l`i'.dat"

	* output '.dta' file 
	local dta_name "`year'/sippl`yr'puw`i'.dta"

	* path to the data dictionary file, 2001 file names are different
	local dct_name "`year'/sipp`yr'w`i'.dct"

	* import
	quietly infile using "`dct_name'", using("`dat_name'") clear

	label data "`calyear' SIPP `year' panel wave `i' created on `c(current_date)'"

	notes: File downloaded on `c(current_date)'

	saveold `dta_name' , replace

	}
} 


** 2001
** This will take roughly 1 minute per wave

foreach year in  2001 {
													
if (`year' == 2001) local yr 01
if (`year' == 2001) local wave 9

quietly forval i = 1/`wave' { 
	* source DAT file
	local dat_name "`year'/l`yr'puw`i'.dat"

	* output '.dta' file 
	local dta_name "`year'/sippl`yr'puw`i'.dta"

	* path to the data dictionary file, 2001 file names are different
	local dct_name "`year'/sipp`yr'w`i'.dct"

	* import
	quietly infile using "`dct_name'", using("`dat_name'") clear

	label data "`calyear' SIPP `year' panel wave `i' created on `c(current_date)'"

	notes: File downloaded on `c(current_date)'

	saveold `dta_name' , replace
	}
}
 


/********************************************************************************
** LABEL SIPP
*Description: 
This file will label the SIPP data.
I made discard the locals in the NBER label do files. Specifically, I made two changes manually.
- Block commented out the locals at the very beginning of the do file, first row to "#delimit ; (this applies to all years)
- Commented out "saveold `dta_name' , replace" -- the last line. (this doesn't apply to 1996 to 2004 because the original scripts don't include the command')

********************************************************************************/


clear all
set more off

local workdir "${path}"
cap cd "`workdir'"

/*******************************************************************************
** 1. LABEL DATA
*******************************************************************************/

** 2001 and 1996
foreach year in 1996 2001 {
	
if (`year' == 1996) local yr 96
if (`year' == 1996) local wave 12
																	
if (`year' == 2001) local yr 01
if (`year' == 2001) local wave 9

forval i = 1/`wave' { 
			
	use "data/`year'/sippl`yr'puw`i'.dta", clear
	
	//do "NBER/`year'/Label/sipp`yr'w`i'.do"

	label data "`calyear' SIPP `year' panel wave `i' created on `c(current_date)'"

	notes: File downloaded from Census FTP site and labeled using NBER dictionary on `c(current_date)'
	
	display "data/`year'/`year'w`i'.dta"
	
	saveold "data/`year'/`year'w`i'.dta" , replace

	}
}


foreach year in 2004 2008 {

if (`year' == 2004) local yr 04
if (`year' == 2004) local wave 12

if (`year' == 2008) local yr 08
if (`year' == 2008) local wave 16

**** 2004/2008 and 1996/2001 have different file names. Pick one and only one chunk of code. 
** 2004 and 2008
forval i = 1/`wave' { 
			
	use "data/`year'/sippl`yr'puw`i'.dta", clear
	
	//do "NBER/`year'/Label/sippl`yr'puw`i'.do" 
	// Downloaded from https://www.nber.org/research/data/survey-income-and-program-participation-sipp
	// I commented out the beginning and the end of the NBER do files where they specify the file input and output locations

	label data "`calyear' SIPP `year' panel wave `i' created on `c(current_date)'"

	notes: File downloaded from Census FTP site and labeled using NBER dictionary on `c(current_date)'
	
	display "data/`year'/`year'w`i'.dta"
	
	saveold "data/`year'/`year'w`i'.dta" , replace
	}
}


/*******************************************************************************
** END OF FILE
*******************************************************************************/


 
 